# encoding:utf-8

import requests
import base64
import sys
import os
import json
import re

'''
通用文字识别
'''
def cleantxt(raw):
    fil=re.compile(u'[^0-9a-zA-Z\u4e00-\u9fa5.，,。？“”《》_（）！；：]+',re.UNICODE)
    return fil.sub(' ',raw)

def ocr(path):
    request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic"
    f = open(path, 'rb')
    img = base64.b64encode(f.read())
    params = {"image":img}
    access_token = "24.dea95b03c6267989f6b6c7174955d8ae.2592000.1595991917.282335-20641856"
    request_url = request_url + "?access_token=" + access_token
    headers = {'content-type': 'application/x-www-form-urlencoded'}
    response = requests.post(request_url, data=params, headers=headers)
    if response:
        result = response.json()
        return result

